import requests
import time
from ollama import chat

# 复用 Session（避免重复建立连接）
session = requests.Session()
url = "http://localhost:11434/api/chat"

def get_response():
    start_time = time.time()
    stream = chat(
        model='qwq:32b',
        messages=[{'role': 'user', 'content': '天空为什么是蓝色,不要思考'}],
        stream=True,
        
    )
    filter_flag = 0
    for chunk in stream:
        
        if("think" in chunk["message"]["content"] ):
            filter_flag +=1
        if filter_flag >=2:
            print(chunk['message']['content'], end='', flush=True)
    print(f"耗时: {time.time() - start_time:.2f}秒")

get_response()  # 首次可能稍慢
# get_response()  # 第二次会更快
